# -*- coding: utf-8 -*-

# Scrapy settings for all_jd project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://docs.scrapy.org/en/latest/topics/settings.html
#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html

BOT_NAME = 'all_jd'

SPIDER_MODULES = ['all_jd.spiders']
NEWSPIDER_MODULE = 'all_jd.spiders'

# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False
LOG_LEVEL = 'ERROR'

# Configure maximum concurrent requests performed by Scrapy (default: 16)
# CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
# 请求发送间隔时间(秒)
DOWNLOAD_DELAY = 2
# The download delay setting will honor only one of:
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
# CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
# COOKIES_ENABLED = False

# Disable Telnet Console (enabled by default)
# TELNETCONSOLE_ENABLED = False

# Override the default request headers:
# DEFAULT_REQUEST_HEADERS = {
#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#   'Accept-Language': 'en',
# }

# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
# SPIDER_MIDDLEWARES = {
#    'all_jd.middlewares.AllJdSpiderMiddleware': 543,
# }

# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
    'all_jd.middlewares.AllJdDownloaderMiddleware': 543,

}

# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
# EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
# }

# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
    # 写入数据库中的管道
    # 'all_jd.pipelines.AllJdPipeline': 300,
    # 写入文件的管道
    'all_jd.pipelines.FilePipeline': 300,
    # crawlab 分布式爬虫系统管道
    # 'crawlab.pipelines.CrawlabMongoPipeline': 888,
}
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
# AUTOTHROTTLE_ENABLED = True
# The initial download delay
# AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
# AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
# AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
# HTTPCACHE_ENABLED = True
# HTTPCACHE_EXPIRATION_SECS = 0
# HTTPCACHE_DIR = 'httpcache'
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'


# 存放爬取数据的数据库链接
MYSQL_DATA = {
    'db_name': 'jd_project',  # 数据库名
    'table_name': '',  # 的表名
    'host': '192.168.43.77',  # 数据库主机地址
    'user': 'root',  # 用户
    'port': 3306,  # 端口
    'password': '123',  # 密码
}

# 存放ip代理的数据库配置（建ips表的sql文件在sql目录中）
MYSQL_IPS_CONNECT = {
    'db_name': 'ips',  # 数据库名
    'host': '192.168.43.77',  # 数据库主机地址
    'user': 'root',  # 用户
    'port': 3306,  # 端口
    'password': '123',  # 密码

}

# 京东查询参数（如果想爬取其他的商品，可以根据实际情况，修改jd.py的xpath即可）
GOODS = '电脑'

# 爬取商品列表的页数
MAX_PAGE = 200


# cookie
# 去复制这个url的cookie：https://search.jd.com/s_new.php?keyword=电脑&page=3&s=90&click=0
# 或者去自己构建个cookie池
COOKIE = {
            "shshshfpa": "128310cb-ab12-f617-e5ea-584934d41990-1588639184",
            "shshshfpb": "wi8AbtcHYwGuDKGrw%2F8DgGA%3D%3D",
            "user-key": "eccac2dc-59f5-4e03-9816-eb909ed89d7d",
            "cn": "0",
            "areaId": "12",
            "PCSYCityID": "CN_320000_320100_0",
            "__jdu": "1487876885",
            "ipLoc-djd": "12-904-907-50559",
            "unpl": "V2_ZzNtbUteFxImXRVVck1VAWIKQFRLBUNAdV9PAHMeVVZiBEIKclRCFnQUR1BnGVoUZwYZXkJcQxVFCEdkeB5fA2AFEFlBZxpFK0oYEDlNDEY1WnxYQFFDF3ALQVxzKWwGZzMSXHJXRhxzAUBTchBYNVcEIm1yU0MUdg92VUsYbEczXxZcQVBGHTgIQ119EFoCbgoWbUNnQA%3d%3d",
            "3AB9D23F7A4B3C9B": "G2WRCKEE5QMWNSY7X6DCSUILBYJG5SLTAB27FHWC4G3EUYLT5QYZET4O7WBYEJZEX6SNQRXI7WYP73CNFCCKC7GKQM",
            "shshshfp": "ffa0b3109b6fcd614466bf8b128436d1",
            "__jdv": "76161171|haosou-search|t_262767352_haosousearch|cpc|4371342699_0_88e6bdb08e8548c88c1d1f8e968b46af|1589721194303",
            "__jdc": "122270672",
            "__jda": "122270672.1487876885.1588639180.1589721194.1589725512.16",
            "__jdb": "122270672.4.1487876885|16.1589725512",
            "shshshsID": "c1e8dcb8ad089424389ec8fb0dd41a57_4_1589725896984"
        }







